import re
import json
import base64
import os
from collections import defaultdict
import logging

def clean_str(text):
    for symbol in ["*","/","'","\"","(",")","[","]","\\","#","&",".",",",":","?","!"]:
        text = text.replace(symbol, '')
    text = re.sub(r'[^\x00-\x7F]+',' ', text)
    text = re.sub(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+', ' ', text)    
    pattern = re.compile(r'[\ue000-\uf8ff]')
    text = pattern.sub('', text)
    text = text.encode("ascii", "ignore").decode()
    return text

def calculate_overlap_percentage(sentence1, sentence2):
    # Tokenize the sentences into sets of words, converting to lowercase to ensure case-insensitive comparison
    sentence1 = clean_str(sentence1)
    sentence2 = clean_str(sentence2)
    words1 = set(sentence1.lower().split())
    words2 = set(sentence2.lower().split())
    
    # Find the common words between the two sets
    common_words = words1.intersection(words2)
    
    # Calculate the total number of unique words across both sentences
    total_unique_words = min(len(words1),len(words2))
    
    # Calculate the percentage of overlap
    if total_unique_words > 0:  # Prevent division by zero
        overlap_percentage = (len(common_words) / total_unique_words) #* 100
    else:
        overlap_percentage = 0

    if re.sub(r'\s+', '', sentence1.lower()) ==re.sub(r'\s+', '', sentence2.lower()):
        overlap_percentage=1
    return overlap_percentage


def extract_information(text):
    patterns = {
        "click": r"Click \[?(\d+)\]?.*",
        "type": r"Type \[?(\d+)\]?[; ]+\[?(.[^\]]*)\]?.*",
        # "delete_and_type": r"Delete_and_Type \[?(\d+)\]?[; ]+\[?(.[^\]]*)\]?",
        "scroll": r"Scroll (?:\[?(\d+|WINDOW)\]?[; ]+)?\[?(up|down)\]?.*",
        "wait": r"^Wait",
        "goback": r"^GoBack",
        "bing": r"^Bing",
        "answer": r"ANSWER[; ]+\[?(.[^\]]*)\]?.*"
    }

    for key, pattern in patterns.items():
        match = re.search(pattern, text)
        if match:
            if key in ["click", "wait", "goback", "bing"]:
                # no content
                return key, match.groups()
            else:
                num = match.group(1)
                if key == "scroll" and num is None:
                    num = "WINDOW"
                return key, {"number": num, "content": match.group(2)} if key in ["type", "scroll"] else {"content": match.group(1)}
    return None, None
    
class PromptProcessor:
    def __init__(self, prompt_path, evaluator_prompt_path, multi_agent, max_attached_imgs=5, verbose=False):
        """
        Initialize the PromptProcessor class.
        
        Args:
            max_attached_imgs (int): Maximum number of images to attach to messages.
        """
        self.max_attached_imgs = max_attached_imgs
        self.double_check = False
        self.verbose = verbose
        self.multi_agent = []

        self.use_image = True # False
        try:
            with open(prompt_path, 'r', encoding='utf-8') as f:
                self.prompt_templates = json.load(f)
            # self.double_check = True
            self.multi_agent = multi_agent.split("|")
            logging.info(f"Load prompt from path: {prompt_path}")
        except Exception as e:
            logging.error(f"Error loading prompt templates: {e}")
            from tti.models.claude_prompts import SYSTEM_PROMPT, SYSTEM_WEBARENA_PROMPT

            self.prompt_templates = {"actor": {
                "initial": SYSTEM_PROMPT.strip() + "\n\nNow given a task: {task_goal} Please interact with {url} and get the answer.\nAccessibility tree of current viewpoint:\n{accessibility_tree}",
                "observation": "Observation:{warn_obs} Please analyze the attached screenshot and give the Thought and Action.\nAccessibility tree of current viewpoint:\n{accessibility_tree}",
                "pdf_observation": "Observation: {pdf_obs} Please analyze the response given by Assistant, then consider whether to continue iterating or not.\nAccessibility tree of current viewpoint:\n{accessibility_tree}",
                "error": "The action you have chosen cannot be executed. Please double-check if you have selected the wrong Numerical Label or Action or Action format. Then provide the revised Thought and Action.",
                "tree_indicator":"\nAccessibility tree of current viewpoint:",
                "pattern": r'Thought:|Action:',
                "hint":{},
            }}
            
        with open(evaluator_prompt_path, "r") as fb:
            self.evaluator_prompt = fb.read()
        self.evaluator_prompt += "\nNow analyze the following case.\n\nTask: {task_goal}\n{reference_answer}URL of the last webpage: {url}\nAccessibility tree of the last webpage:\n{accessibility_tree}\nResult response: {answer}\nLast {num} screenshots upon task completion:"

        logging.info(f"Multi-agent: {self.multi_agent}")
        

    def process_batch_evaluation(self, batch_observation, batch_eval_info):
        """
        Process batch observations for evaluation, tracking which indices contain valid data.
        
        Returns:
            tuple: (batch_msgs, valid_indices) where valid_indices tracks which original indices 
                have valid observations and are included in batch_msgs
        """
        batch_msgs = []
        valid_indices = []  # Track which indices have valid observations
        
        for i in range(len(batch_observation)):
            # Only process valid observations
            if batch_observation[i] is not None and batch_eval_info[i] is not None:
                try:
                    msg = self.process_evaluation(batch_observation[i], batch_eval_info[i])
                    batch_msgs.append(msg)
                    valid_indices.append(i)  # Track the original index
                except Exception as e:
                    print(f"Error processing evaluation for observation {i}: {str(e)}")
                    # Don't include failed processing in the results
        
        return batch_msgs, valid_indices

    def create_dummy_evaluation_message(self):
        """Create a dummy evaluation message for None observations"""
        # Use placeholders for all required fields
        dummy_task = "Task was not completed or failed"
        dummy_url = "http://example.com"
        dummy_tree = "No accessibility tree available"
        dummy_answer = "N/A"
        dummy_ref_answer = ""
        
        # Create a message using the evaluator prompt template
        msg = self.evaluator_prompt.replace("{url}", dummy_url)\
                                .replace("{task_goal}", dummy_task)\
                                .replace("{accessibility_tree}", dummy_tree)\
                                .replace("{answer}", dummy_answer)\
                                .replace("{num}", "0")\
                                .replace("{reference_answer}", dummy_ref_answer)
        
        msg_format = [
            {
                'role': 'user',
                'content': [
                    {'type': 'text', 'text': msg + "\nYour verdict:\n"}
                ]
            }
        ]
        return msg_format

    def process_evaluation(self, observation, eval_info):
        # Handle the case where observation or eval_info is None
        if observation is None or eval_info is None:
            return self.create_dummy_evaluation_message()
        
        # Get task_dir with a default value if not present
        task_dir = observation.get('task_dir', '')
        if not task_dir:
            # If task_dir is empty, create a dummy message
            return self.create_dummy_evaluation_message()
        
        answer = eval_info.get('answer', 'N/A')
        reference_answer = eval_info.get('reference_answer', '')
        if reference_answer is None:
            reference_answer = ''
        else:
            reference_answer = 'The reference answer is: ' + reference_answer + "\n"
        
        # Check if task_dir exists and handle errors
        try:
            screenshots = [int(f.split("/")[-1].split('.png')[0].replace("screenshot","")) for f in os.listdir(task_dir) if '.png' in f]
            screenshots.sort()
            num = min(self.max_attached_imgs, len(screenshots))
            screenshots = screenshots[-num:]

            task_goal = observation.get('task', '')
            accessibility_tree = observation.get('tree', '')
            url = observation.get('url', '')
            msg = self.evaluator_prompt.replace("{url}", url).replace("{task_goal}", task_goal).replace("{accessibility_tree}", accessibility_tree).replace("{answer}", answer).replace("{num}", str(num)).replace("{reference_answer}", reference_answer)
        
            whole_content_img = []
            for screenshot_id in screenshots:
                cur_img_path = os.path.join(task_dir, f'screenshot{screenshot_id}.png')
                whole_content_img.append({
                            'type': 'image', 
                        'source': {
                                'type': 'path', 'path': cur_img_path}
                        })
        
            msg_format = [
            {
                'role': 'user',
                'content': (
                    [{'type': 'text', 'text': msg}]
                    + whole_content_img +
                    [{'type': 'text', 'text': "Your verdict:\n"}]
                )
            }
            ]
            return msg_format
        except Exception as e:
            print(f"Error creating evaluation message: {str(e)}")
            return self.create_dummy_evaluation_message()

    def process_batch_observation(self, batch_observation, batch_history, time_step=1, role="actor"):
        batch_msgs = []
        for i in range(len(batch_observation)):
            batch_msgs.append(self.process_observation(batch_observation[i], batch_history[i], time_step, role))
        return batch_msgs
        
    def process_observation(self, observation, history, time_step=1, role="actor"):
        """
        Process an observation dictionary and create a formatted message.
        
        Args:
            observation (dict): Dictionary containing observation data including:
                - task (dict): Task information
                - image (str): Path to screenshot
                - web_name (str): Name of the website being browsed
                - history (str): Interaction history
                - tree (str): Accessibility tree information
                - url (str): Current URL
                - pdf_obs (str, optional): PDF observation text
                - warn_obs (str, optional): Warning observation text
                - fail_obs (str, optional): Failure observation text
            time_step (int): Current time step in the interaction
            history (list, optional): List of previous messages
        
        Returns:
            dict: Formatted message for the LLM
        """
        # Extract values from observation
        task_goal = observation.get('task', '')
        image_path = observation.get('image')
        accessibility_tree = observation.get('tree', '')
        url = observation.get('url', '')
        pdf_obs = observation.get('pdf_obs', '')
        warn_obs = observation.get('warn_obs', '')
        fail_obs = observation.get('fail_obs', '')
        task_domain = observation.get('web_name', '')
        # Encode image
        # b64_img = self._encode_image(image_path) if image_path else None          
        
        # If there's a failure observation, return error message
        # if fail_obs:
        #     new_msg = self.format_error_message(fail_obs, role)
        
        # Format initial or observation message
        if time_step == 1:
            history = self.format_initial_message(task_goal, task_domain, url, image_path, accessibility_tree, fail_obs)
            plan = observation.get('plan', '')
            if plan:
                history[-1]['content'][0]['text'] = history[-1]['content'][0]['text'].replace("{plan}", f"\n\nHere's a reference plan to guide your planning: {plan}")
            else:
                history[-1]['content'][0]['text'] = history[-1]['content'][0]['text'].replace("{plan}", "")
                
        else:
            new_msg = self.format_observation_message(task_goal, url, pdf_obs, warn_obs, image_path, accessibility_tree, fail_obs)
            if "Action: ANSWER" in history[-1]['content'][0]['text'] or "Action:\nANSWER" in history[-1]['content'][0]['text']:
                if self.verbose:
                    print("Last action is ANSWER")
                    
                new_msg['content'][0]['text'] += "\n\nImportant: You returned an answer in the last step. Let's pause, check the web page, and think again. If you still think the task is finished, double-check your answer, revise it if need, and return a final answer. If not, continue the task." 
            
            if len(history) >= 3:
                history = self.clip_messages(history, role)
            history.append(new_msg)

        if self.multi_agent[0]:
            additional_info = f"Here're some additional information the user has provided.\n\n"
            for agent in self.multi_agent:
                info = observation.get(agent, '').strip()
                if info:
                    if agent == "plan":
                        additional_info += f"Tentative plan: {info}\n\n"
                    if agent == "summary":
                        additional_info += f"Summary of previous observation: {info}\n\n"
                    if agent == "error":
                        if "no error" in info.lower():
                            continue
                        additional_info += f"Potential issue: {info}\n\n"
            history[-1]['content'][0]['text'] = history[-1]['content'][0]['text'] + "\n\n" + additional_info.strip()

        if fail_obs:
            history[-1]['content'][0]['text'] = history[-1]['content'][0]['text'] + fail_obs
        if self.verbose:       
            print(history[-1]['content'][0]['text'][re.search("Current URL:",history[-1]['content'][0]['text'] ).start():])
                
            with open(observation['task_dir'] + f"/msg{time_step}.json", "w") as f:
                json.dump(history, f, indent=4)
            
        return history

    def process_batch_agent_call(self, batch_observation, new_past_act):
        agent_dict = defaultdict(list)
        for i in range(len(batch_observation)):
            observation = batch_observation[i]
            task_goal = observation.get('task', '')
            image_path = observation.get('image')
            accessibility_tree = observation.get('tree', '')
            url = observation.get('url', '')
            summary = observation.get('summary', self.prompt_templates["summary"]["initial"])
            plan = observation.get('plan', self.prompt_templates["plan"]["initial"])
            past_actions = new_past_act[i] if new_past_act[i] else "None. The user has not yet performed any action."
            # if self.verbose:
            #     print("-"*20, "PAST ACTIONS", "-"*20)    
            #     print(past_actions)

            # b64_img = self._encode_image(image_path) if image_path else None  

            for agent in self.multi_agent:
                base_text = self.prompt_templates[agent]["observation"].replace("{url}", url).replace("{task_goal}", task_goal).replace("{accessibility_tree}", accessibility_tree).replace("{summary}", summary).replace("{plan}", plan).replace("{past_actions}", past_actions)
            
                curr_msg = {
                    'role': 'user',
                    'content': [
                        {'type': 'text', 'text': base_text},
                    ]
                }
                
                if self.use_image and image_path:
                    curr_msg['content'].append({
                        'type': 'image', 
                       'source': {
                            'type': 'path', 'path': image_path}
                    })
    
                msg = [{
            "role": "system",
            "content": [{"type": "text", "text": "You are a helpful assistant."}]
        }, curr_msg]
                
                agent_dict[agent].append(msg)
        return agent_dict
            
            
    
    def format_initial_message(self, task_goal, task_domain, url, image_path, accessibility_tree, fail_obs):
        """
        Format the initial message for the LLM.
        
        Args:
            task_goal (dict): Task information (should contain 'ques' key)
            url (str): Current URL
            web_img_b64 (str): Base64 encoded screenshot image
            accessibility_tree (str, optional): Accessibility tree information
            
        Returns:
            dict: Formatted message for the LLM
        """
        role = "actor"
        if task_domain in self.prompt_templates[role]["hint"].keys():
            hint = self.prompt_templates[role]["hint"][task_domain]
        else:
            hint = self.prompt_templates[role]["hint"]["general"]
        init_msg = self.prompt_templates[role]["initial"].replace("{url}", url).replace("{task_goal}", task_goal).replace("{accessibility_tree}", accessibility_tree).replace("{hint}", hint)
        
        init_msg_format = [
    {
        "role": "system",
        "content": [{"type": "text", "text": "You are a helpful assistant."}]
    },
    {
        'role': 'user',
        'content': [
            {'type': 'text', 'text': init_msg},
        ]
    }
            
]        
        if self.use_image and image_path:
            init_msg_format[-1]['content'].append({
                'type': 'image', 
                'source': {
                            'type': 'path', 'path': image_path}
            })
        
        return init_msg_format
    
    def format_observation_message(self, task_goal, url, pdf_obs, warn_obs, image_path, accessibility_tree, fail_obs):
        """
        Format an observation message for the LLM.
        
        Args:
            pdf_obs (str): PDF observation text, if any
            warn_obs (str): Warning observation text, if any
            web_img_b64 (str): Base64 encoded screenshot image
            accessibility_tree (str, optional): Accessibility tree information
            
        Returns:
            dict: Formatted observation message for the LLM
        """
        role = "actor"
        if not pdf_obs:
            base_text = self.prompt_templates[role]["observation"].replace("{url}", url).replace("{task_goal}", task_goal).replace("{accessibility_tree}", accessibility_tree).replace("{warn_obs}", warn_obs)#.replace("{fail_obs}", fail_obs)
        else:
            base_text = self.prompt_templates[role]["pdf_observation"].replace("{url}", url).replace("{task_goal}", task_goal).replace("{accessibility_tree}", accessibility_tree).replace("{warn_obs}", warn_obs).replace("{pdf_obs}", pdf_obs)#.replace("{fail_obs}", fail_obs)
        
        curr_msg = {
            'role': 'user',
            'content': [
                {'type': 'text', 'text': base_text},
            ]
        }
        
        if self.use_image and image_path:
            curr_msg['content'].append({
                'type': 'image', 
                'source': {
                            'type': 'path', 'path': image_path}
            })
        
        return curr_msg
    
    def format_error_message(self, error_text, role):
        """
        Format an error message for the LLM.
        
        Args:
            error_text (str): The error text to include in the message.
            
        Returns:
            dict: Formatted error message for the LLM.
        """
        return {
            'role': 'user',
            'content': self.prompt_templates[role]["error"].replace("{error_text}", error_text)
        }
    
    def clip_messages(self, messages, role="actor"):
        """
        Clip the messages to ensure no more than max_attached_imgs images are included.
        
        Args:
            messages (list): List of messages to clip.
            
        Returns:
            list: Clipped messages.
        """
        img_count = 0
        user_msg_indices = []
        
        # Find user messages with images
        for i, msg in enumerate(messages):
            if msg['role'] == 'user':
                if isinstance(msg['content'], list):
                    for j, item in enumerate(msg['content']):
                        if item.get('type') == 'text':
                            if "Observation omitted for previous steps." in messages[i]['content'][j]['text']:
                                continue
                            if "Now solve the following task." in messages[i]['content'][j]['text']:
                                if "Here're some additional information" in messages[i]['content'][j]['text']:
                                    messages[i]['content'][j]['text'] = messages[i]['content'][j]['text'][:re.search("Current URL:", messages[i]['content'][j]['text']).start()] +"Observation omitted for previous steps. See attachment for screenshot. "+messages[i]['content'][j]['text'][re.search("Here're some additional information", messages[i]['content'][j]['text']).start():]
                                else:
                                    messages[i]['content'][j]['text'] = messages[i]['content'][j]['text'][:re.search("Screenshot of current viewpoint:", messages[i]['content'][j]['text']).start()] +"Observation omitted for previous steps. See attachment for screenshot."
                            elif "Here're some additional information" in messages[i]['content'][j]['text']:
                                messages[i]['content'][j]['text'] = "Observation omitted for previous steps. See attachment for screenshot. "+messages[i]['content'][j]['text'][re.search("Here're some additional information", messages[i]['content'][j]['text']).start():]
                            else:
                                messages[i]['content'][j]['text'] = "Observation omitted for previous steps. See attachment for screenshot."
                                
                        elif 'image' in item.get('type'):
                            img_count += 1
        if img_count > self.max_attached_imgs:
            for i, msg in enumerate(messages):
                if msg['role'] == 'user':
                    if isinstance(msg['content'], list):
                        for j, item in enumerate(msg['content']):
                            if 'image' in item.get('type'):
                                del messages[i]['content'][j]
                                img_count -= 1
                                messages[i]['content'][0]['text'] = messages[i]['content'][0]['text'].replace("See attachment for screenshot.", "").strip()
                        if img_count <= self.max_attached_imgs:
                            break
        return messages
    

    def process_batch_response(self, batch_response, batch_observation=None):
        action_keys, infos, messages = [], [], []
        for i in range(len(batch_response)):
            action_key, info, message = self.process_llm_response(batch_response[i], batch_observation[i])
            action_keys.append(action_key)
            infos.append(info)
            messages.append(message)
        return action_keys, infos, messages
        
    def process_llm_response(self, response, observation):
        """
        Process the LLM response to extract action information.
        
        Args:
            response (str): The LLM response text.
            
        Returns:
            tuple: (action_key, info) extracted from the response or (None, None) if format is invalid.
        """
        role="actor"
        message = {
            'role': 'assistant',
            'content': [
                {'type': 'text', 'text': response},
            ]
        }

        if self.verbose:
            print("-"*20, "RESPONSE", "-"*20)
            print(response)
        # Check if response has required sections
        for tag in self.prompt_templates[role]["pattern"].split("|"):
            if tag not in response:
                return None, None, message

            
        # Extract the action section
        sections = re.split(self.prompt_templates[role]["pattern"], response)
        action = sections[-1].strip() 
        found = True

        if "click" in action.lower() or "type" in action.lower():
            action_id = re.findall(r'\[(\d+)\]', action)
            if not action_id:
                return None, None, message
    
            action_id = int(action_id[0])
            tree = observation['tree']
     
            if "where [" in action:
                if self.double_check:
                    found = False
                    
                    action_check = action[re.search("where \[", action).end()-1:].replace("] is ","] ")
                    
                    for qid, ac in enumerate(tree.split("\n")):
                        if ("["+str(action_id)+"]") in ac:
                            # print("[cal]", action_check,ac,calculate_overlap_percentage(action_check, ac))
                            if calculate_overlap_percentage(action_check, ac)==1:
                                idx = qid
                                found=True
                                break
                    # if not found:
                    #     print("[FAIL] not found")     
                        # response = response + "\n\n[System note: this action is invalid because the specified element does not match the webpage display.]"
                action = action[:re.search(r"where \[", action).start()].strip()

                
        if found: 
            action_key, info = extract_information(action)
            # if self.verbose:
            #     print("-"*20, "ACTION", "-"*20)
            #     print(action_key, info)
        else:
            action_key = None
            info = None

        message = {
            'role': 'assistant',
            'content': [
                {'type': 'text', 'text': response},
            ]
        }
        return action_key, info, message
 
    
    def _encode_image(self, image_path):
        """
        Encode an image to base64.
        
        Args:
            image_path (str): Path to the image file.
            
        Returns:
            str: Base64 encoded image string.
        """        
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
        except Exception as e:
            print(f"Error encoding image: {e}")
            return None

    def summarize_trajectory(self, task, traj):
  

        curr_msg = {
                    'role': 'user',
                    'content': [
                        {'type': 'text', 'text': f"You will be given a web-based task and a successful action trajectory to complete this task. Your goal is to summarize the actions into a cohesive paragraph so that it can be used as an instruction for people to complete the same task in the futre. You should remove any repetitive actions and the final task-specific answer. Just focus on the process to arrive at the answer. Be clear and concise. Output your answer in the following format:\nTask: repeat the task here.\nPlan: state your action summary here.\n\nNow solve the following problem. The task is {task}. The action trajectory is: {traj}"},
                    ]
                }
  
                
        msg = [{
            "role": "system",
            "content": [{"type": "text", "text": "You are a helpful assistant."}]
        }, curr_msg]

        return msg
    def plan_trajectory(self, task, example):
  

        curr_msg = {
                    'role': 'user',
                    'content': [
                        {'type': 'text', 'text': f"I have a simple task objective related to web browsing, rewrite it into a single paragraph of detailed step-by-step actions to achieve the task. When revising the objective, follow the rules:\n- Do not include any newlines, tabs, step numbers in the rewritten objective.\n- Follow the example as much as possible.\n- Pay attention to words like \"latest\", \"newest\", \"hottest\", which require clicking the dropdown menu and select \"New\" or \"Top\" with the correct time range.\n- When the subforum is not specified, choose a word (object or location) contained in the given objective.\nHere are the examples:\n{example}\n\nThe new task is: {task}"},
                    ]
                }
  
                
        msg = [{
            "role": "system",
            "content": [{"type": "text", "text": "You are a helpful assistant."}]
        }, curr_msg]

        return msg

    def process_batch_browsecomp_eval(self, batch_observation, batch_eval_info):
        batch_msgs = []
        for i in range(len(batch_observation)):
            batch_msgs.append(self.browsecomp_eval(batch_observation[i]["task"], batch_eval_info[i]["answer"], batch_eval_info[i]["reference_answer"]))
        return batch_msgs
        
    def browsecomp_eval(self, question, response, correct_answer):
        GRADER_TEMPLATE = """
Judge whether the following [response] to [question] is correct or not based on the precise and unambiguous [correct_answer] below.

[question]: {question}

[response]: {response}

Your judgement must be in the format and criteria specified below:

extracted_final_answer: The final exact answer extracted from the [response]. Put the extracted answer as 'None' if there is no exact, final answer to extract from the response.

[correct_answer]: {correct_answer}

reasoning: Explain why the extracted_final_answer is correct or incorrect based on [correct_answer], focusing only on if there are meaningful differences between [correct_answer] and the extracted_final_answer. Do not comment on any background to the problem, do not attempt to solve the problem, do not argue for any answer different than [correct_answer], focus only on whether the answers match.

correct: Answer 'yes' if extracted_final_answer matches the [correct_answer] given above, or is within a small margin of error for numerical problems. Answer 'no' otherwise, i.e. if there if there is any inconsistency, ambiguity, non-equivalency, or if the extracted answer is incorrect.


confidence: The extracted confidence score between 0|\%| and 100|\%| from [response]. Put 100 if there is no confidence score available.
""".strip()
        curr_msg = {
                    'role': 'user',
                    'content': [
                        {'type': 'text', 'text': GRADER_TEMPLATE.format(
            question=question,
            correct_answer=correct_answer,
            response=response,
        )
},
                    ]
                }
  
                
        msg = [{
            "role": "system",
            "content": [{"type": "text", "text": "You are a helpful assistant."}]
        }, curr_msg]

        return msg